{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "PeekyDecoder 的初始化和上一节的 Decoder 基本上是一样的，不同之处仅在于 LSTM 层权重和 Affine 层权重的形状。因为这次的实现要接收编码器编码好的向量，所以权重参数的形状相应地变大了。\n",
    "接着是 forward() 的实现。这里首先使用 np.repeat() 根据时序大小复制相应份数的 h，并将其设置为 hs。然后，将 hs 和 Embedding 层的输出用np.concatenate() 拼接，并输入 LSTM 层。同样地，Affine 层的输入也是 hs和 LSTM 层的输出的拼接。\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [],
   "source": [
    "import sys\n",
    "\n",
    "sys.path.append('..')\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from dataset import sequence\n",
    "from common.optimizer import Adam\n",
    "from common.trainer import Trainer\n",
    "from common.util import eval_seq2seq\n",
    "from peeky_seq2seq import PeekySeq2seq"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T17:15:53.785810500Z",
     "start_time": "2023-05-09T17:15:53.774793900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "# 读入数据集\n",
    "(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')\n",
    "x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]\n",
    "char_to_id, id_to_char = sequence.get_vocab()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T17:24:22.056235900Z",
     "start_time": "2023-05-09T17:24:21.852240900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [],
   "source": [
    "# 设定超参数\n",
    "vocab_size = len(char_to_id)\n",
    "wordvec_size = 16\n",
    "hidden_size = 128\n",
    "batch_size = 128\n",
    "max_epoch = 25\n",
    "max_grad = 5.0"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T17:24:28.840161300Z",
     "start_time": "2023-05-09T17:24:28.835163200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [],
   "source": [
    "# 生成模型/优化器/训练器\n",
    "model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size)\n",
    "optimizer = Adam()\n",
    "trainer = Trainer(model, optimizer)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T17:24:29.242517900Z",
     "start_time": "2023-05-09T17:24:29.213518600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "| epoch 1 |  iter 1 / 351 | time 0[s] | loss 2.57\n",
      "| epoch 1 |  iter 21 / 351 | time 0[s] | loss 2.48\n",
      "| epoch 1 |  iter 41 / 351 | time 0[s] | loss 2.20\n",
      "| epoch 1 |  iter 61 / 351 | time 1[s] | loss 1.99\n",
      "| epoch 1 |  iter 81 / 351 | time 1[s] | loss 1.89\n",
      "| epoch 1 |  iter 101 / 351 | time 2[s] | loss 1.82\n",
      "| epoch 1 |  iter 121 / 351 | time 2[s] | loss 1.82\n",
      "| epoch 1 |  iter 141 / 351 | time 3[s] | loss 1.80\n",
      "| epoch 1 |  iter 161 / 351 | time 3[s] | loss 1.79\n",
      "| epoch 1 |  iter 181 / 351 | time 4[s] | loss 1.78\n",
      "| epoch 1 |  iter 201 / 351 | time 4[s] | loss 1.77\n",
      "| epoch 1 |  iter 221 / 351 | time 4[s] | loss 1.76\n",
      "| epoch 1 |  iter 241 / 351 | time 5[s] | loss 1.76\n",
      "| epoch 1 |  iter 261 / 351 | time 5[s] | loss 1.75\n",
      "| epoch 1 |  iter 281 / 351 | time 6[s] | loss 1.74\n",
      "| epoch 1 |  iter 301 / 351 | time 6[s] | loss 1.74\n",
      "| epoch 1 |  iter 321 / 351 | time 7[s] | loss 1.73\n",
      "| epoch 1 |  iter 341 / 351 | time 7[s] | loss 1.73\n",
      "Q   58+77\n",
      "T 162 \n",
      "X 100 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "X 1013\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 102 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "X 100 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "X 1023\n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "X 1023\n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "X 1023\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "X 1111\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "X 102 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "X 102 \n",
      "---\n",
      "val acc 0.280%\n",
      "| epoch 2 |  iter 1 / 351 | time 0[s] | loss 1.71\n",
      "| epoch 2 |  iter 21 / 351 | time 0[s] | loss 1.71\n",
      "| epoch 2 |  iter 41 / 351 | time 0[s] | loss 1.71\n",
      "| epoch 2 |  iter 61 / 351 | time 1[s] | loss 1.71\n",
      "| epoch 2 |  iter 81 / 351 | time 1[s] | loss 1.70\n",
      "| epoch 2 |  iter 101 / 351 | time 2[s] | loss 1.68\n",
      "| epoch 2 |  iter 121 / 351 | time 2[s] | loss 1.69\n",
      "| epoch 2 |  iter 141 / 351 | time 3[s] | loss 1.68\n",
      "| epoch 2 |  iter 161 / 351 | time 3[s] | loss 1.67\n",
      "| epoch 2 |  iter 181 / 351 | time 4[s] | loss 1.67\n",
      "| epoch 2 |  iter 201 / 351 | time 4[s] | loss 1.65\n",
      "| epoch 2 |  iter 221 / 351 | time 5[s] | loss 1.65\n",
      "| epoch 2 |  iter 241 / 351 | time 5[s] | loss 1.65\n",
      "| epoch 2 |  iter 261 / 351 | time 6[s] | loss 1.63\n",
      "| epoch 2 |  iter 281 / 351 | time 6[s] | loss 1.62\n",
      "| epoch 2 |  iter 301 / 351 | time 6[s] | loss 1.61\n",
      "| epoch 2 |  iter 321 / 351 | time 7[s] | loss 1.61\n",
      "| epoch 2 |  iter 341 / 351 | time 7[s] | loss 1.60\n",
      "Q   58+77\n",
      "T 162 \n",
      "X 100 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "X 1200\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 690 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "X 100 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "X 690 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "X 999 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "X 1029\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "X 1240\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "X 792 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "X 290 \n",
      "---\n",
      "val acc 0.400%\n",
      "| epoch 3 |  iter 1 / 351 | time 0[s] | loss 1.58\n",
      "| epoch 3 |  iter 21 / 351 | time 0[s] | loss 1.59\n",
      "| epoch 3 |  iter 41 / 351 | time 0[s] | loss 1.58\n",
      "| epoch 3 |  iter 61 / 351 | time 1[s] | loss 1.56\n",
      "| epoch 3 |  iter 81 / 351 | time 1[s] | loss 1.55\n",
      "| epoch 3 |  iter 101 / 351 | time 2[s] | loss 1.53\n",
      "| epoch 3 |  iter 121 / 351 | time 2[s] | loss 1.51\n",
      "| epoch 3 |  iter 141 / 351 | time 3[s] | loss 1.50\n",
      "| epoch 3 |  iter 161 / 351 | time 3[s] | loss 1.49\n",
      "| epoch 3 |  iter 181 / 351 | time 4[s] | loss 1.47\n",
      "| epoch 3 |  iter 201 / 351 | time 4[s] | loss 1.46\n",
      "| epoch 3 |  iter 221 / 351 | time 5[s] | loss 1.43\n",
      "| epoch 3 |  iter 241 / 351 | time 5[s] | loss 1.42\n",
      "| epoch 3 |  iter 261 / 351 | time 6[s] | loss 1.41\n",
      "| epoch 3 |  iter 281 / 351 | time 6[s] | loss 1.39\n",
      "| epoch 3 |  iter 301 / 351 | time 7[s] | loss 1.37\n",
      "| epoch 3 |  iter 321 / 351 | time 7[s] | loss 1.36\n",
      "| epoch 3 |  iter 341 / 351 | time 8[s] | loss 1.35\n",
      "Q   58+77\n",
      "T 162 \n",
      "X 154 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "X 1033\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 644 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "X 161 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "X 433 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "X 818 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "X 1018\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "X 1344\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "X 834 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "X 211 \n",
      "---\n",
      "val acc 1.600%\n",
      "| epoch 4 |  iter 1 / 351 | time 0[s] | loss 1.32\n",
      "| epoch 4 |  iter 21 / 351 | time 0[s] | loss 1.32\n",
      "| epoch 4 |  iter 41 / 351 | time 0[s] | loss 1.30\n",
      "| epoch 4 |  iter 61 / 351 | time 1[s] | loss 1.30\n",
      "| epoch 4 |  iter 81 / 351 | time 1[s] | loss 1.28\n",
      "| epoch 4 |  iter 101 / 351 | time 2[s] | loss 1.27\n",
      "| epoch 4 |  iter 121 / 351 | time 2[s] | loss 1.25\n",
      "| epoch 4 |  iter 141 / 351 | time 3[s] | loss 1.24\n",
      "| epoch 4 |  iter 161 / 351 | time 3[s] | loss 1.22\n",
      "| epoch 4 |  iter 181 / 351 | time 4[s] | loss 1.21\n",
      "| epoch 4 |  iter 201 / 351 | time 4[s] | loss 1.20\n",
      "| epoch 4 |  iter 221 / 351 | time 5[s] | loss 1.20\n",
      "| epoch 4 |  iter 241 / 351 | time 5[s] | loss 1.17\n",
      "| epoch 4 |  iter 261 / 351 | time 6[s] | loss 1.16\n",
      "| epoch 4 |  iter 281 / 351 | time 6[s] | loss 1.14\n",
      "| epoch 4 |  iter 301 / 351 | time 7[s] | loss 1.12\n",
      "| epoch 4 |  iter 321 / 351 | time 7[s] | loss 1.11\n",
      "| epoch 4 |  iter 341 / 351 | time 8[s] | loss 1.10\n",
      "Q   58+77\n",
      "T 162 \n",
      "X 158 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "X 1123\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 657 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "X 165 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "X 423 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "X 777 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "X 1023\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "X 1388\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "X 887 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "X 223 \n",
      "---\n",
      "val acc 5.140%\n",
      "| epoch 5 |  iter 1 / 351 | time 0[s] | loss 1.08\n",
      "| epoch 5 |  iter 21 / 351 | time 0[s] | loss 1.07\n",
      "| epoch 5 |  iter 41 / 351 | time 1[s] | loss 1.05\n",
      "| epoch 5 |  iter 61 / 351 | time 1[s] | loss 1.04\n",
      "| epoch 5 |  iter 81 / 351 | time 1[s] | loss 1.02\n",
      "| epoch 5 |  iter 101 / 351 | time 2[s] | loss 1.01\n",
      "| epoch 5 |  iter 121 / 351 | time 2[s] | loss 1.00\n",
      "| epoch 5 |  iter 141 / 351 | time 3[s] | loss 0.99\n",
      "| epoch 5 |  iter 161 / 351 | time 3[s] | loss 0.99\n",
      "| epoch 5 |  iter 181 / 351 | time 4[s] | loss 0.96\n",
      "| epoch 5 |  iter 201 / 351 | time 4[s] | loss 0.95\n",
      "| epoch 5 |  iter 221 / 351 | time 5[s] | loss 0.94\n",
      "| epoch 5 |  iter 241 / 351 | time 5[s] | loss 0.92\n",
      "| epoch 5 |  iter 261 / 351 | time 6[s] | loss 0.91\n",
      "| epoch 5 |  iter 281 / 351 | time 6[s] | loss 0.90\n",
      "| epoch 5 |  iter 301 / 351 | time 7[s] | loss 0.89\n",
      "| epoch 5 |  iter 321 / 351 | time 7[s] | loss 0.88\n",
      "| epoch 5 |  iter 341 / 351 | time 8[s] | loss 0.87\n",
      "Q   58+77\n",
      "T 162 \n",
      "X 160 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "X 1135\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 668 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "X 169 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "X 861 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "X 1045\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "X 1324\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "X 861 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "X 239 \n",
      "---\n",
      "val acc 9.380%\n",
      "| epoch 6 |  iter 1 / 351 | time 0[s] | loss 0.90\n",
      "| epoch 6 |  iter 21 / 351 | time 0[s] | loss 0.86\n",
      "| epoch 6 |  iter 41 / 351 | time 1[s] | loss 0.83\n",
      "| epoch 6 |  iter 61 / 351 | time 1[s] | loss 0.84\n",
      "| epoch 6 |  iter 81 / 351 | time 2[s] | loss 0.82\n",
      "| epoch 6 |  iter 101 / 351 | time 2[s] | loss 0.81\n",
      "| epoch 6 |  iter 121 / 351 | time 2[s] | loss 0.80\n",
      "| epoch 6 |  iter 141 / 351 | time 3[s] | loss 0.79\n",
      "| epoch 6 |  iter 161 / 351 | time 3[s] | loss 0.78\n",
      "| epoch 6 |  iter 181 / 351 | time 4[s] | loss 0.77\n",
      "| epoch 6 |  iter 201 / 351 | time 4[s] | loss 0.76\n",
      "| epoch 6 |  iter 221 / 351 | time 5[s] | loss 0.76\n",
      "| epoch 6 |  iter 241 / 351 | time 5[s] | loss 0.74\n",
      "| epoch 6 |  iter 261 / 351 | time 6[s] | loss 0.74\n",
      "| epoch 6 |  iter 281 / 351 | time 6[s] | loss 0.73\n",
      "| epoch 6 |  iter 301 / 351 | time 7[s] | loss 0.72\n",
      "| epoch 6 |  iter 321 / 351 | time 7[s] | loss 0.72\n",
      "| epoch 6 |  iter 341 / 351 | time 8[s] | loss 0.71\n",
      "Q   58+77\n",
      "T 162 \n",
      "X 163 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "X 1138\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 668 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "X 166 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "X 423 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "X 858 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "X 1048\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "X 1428\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "X 873 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "X 239 \n",
      "---\n",
      "val acc 15.040%\n",
      "| epoch 7 |  iter 1 / 351 | time 0[s] | loss 0.68\n",
      "| epoch 7 |  iter 21 / 351 | time 0[s] | loss 0.69\n",
      "| epoch 7 |  iter 41 / 351 | time 1[s] | loss 0.67\n",
      "| epoch 7 |  iter 61 / 351 | time 1[s] | loss 0.66\n",
      "| epoch 7 |  iter 81 / 351 | time 2[s] | loss 0.66\n",
      "| epoch 7 |  iter 101 / 351 | time 2[s] | loss 0.65\n",
      "| epoch 7 |  iter 121 / 351 | time 3[s] | loss 0.65\n",
      "| epoch 7 |  iter 141 / 351 | time 3[s] | loss 0.64\n",
      "| epoch 7 |  iter 161 / 351 | time 4[s] | loss 0.63\n",
      "| epoch 7 |  iter 181 / 351 | time 4[s] | loss 0.61\n",
      "| epoch 7 |  iter 201 / 351 | time 5[s] | loss 0.61\n",
      "| epoch 7 |  iter 221 / 351 | time 5[s] | loss 0.60\n",
      "| epoch 7 |  iter 241 / 351 | time 6[s] | loss 0.57\n",
      "| epoch 7 |  iter 261 / 351 | time 6[s] | loss 0.57\n",
      "| epoch 7 |  iter 281 / 351 | time 7[s] | loss 0.57\n",
      "| epoch 7 |  iter 301 / 351 | time 7[s] | loss 0.55\n",
      "| epoch 7 |  iter 321 / 351 | time 8[s] | loss 0.54\n",
      "| epoch 7 |  iter 341 / 351 | time 8[s] | loss 0.53\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 665 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "X 156 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "X 858 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "X 1052\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "X 1428\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "X 235 \n",
      "---\n",
      "val acc 39.100%\n",
      "| epoch 8 |  iter 1 / 351 | time 0[s] | loss 0.51\n",
      "| epoch 8 |  iter 21 / 351 | time 0[s] | loss 0.50\n",
      "| epoch 8 |  iter 41 / 351 | time 1[s] | loss 0.49\n",
      "| epoch 8 |  iter 61 / 351 | time 1[s] | loss 0.48\n",
      "| epoch 8 |  iter 81 / 351 | time 2[s] | loss 0.47\n",
      "| epoch 8 |  iter 101 / 351 | time 2[s] | loss 0.46\n",
      "| epoch 8 |  iter 121 / 351 | time 3[s] | loss 0.46\n",
      "| epoch 8 |  iter 141 / 351 | time 3[s] | loss 0.44\n",
      "| epoch 8 |  iter 161 / 351 | time 4[s] | loss 0.41\n",
      "| epoch 8 |  iter 181 / 351 | time 4[s] | loss 0.42\n",
      "| epoch 8 |  iter 201 / 351 | time 5[s] | loss 0.41\n",
      "| epoch 8 |  iter 221 / 351 | time 5[s] | loss 0.40\n",
      "| epoch 8 |  iter 241 / 351 | time 6[s] | loss 0.39\n",
      "| epoch 8 |  iter 261 / 351 | time 6[s] | loss 0.37\n",
      "| epoch 8 |  iter 281 / 351 | time 7[s] | loss 0.36\n",
      "| epoch 8 |  iter 301 / 351 | time 7[s] | loss 0.36\n",
      "| epoch 8 |  iter 321 / 351 | time 8[s] | loss 0.35\n",
      "| epoch 8 |  iter 341 / 351 | time 8[s] | loss 0.34\n",
      "Q   58+77\n",
      "T 162 \n",
      "X 161 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 657 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "X 155 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "X 1438\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 65.060%\n",
      "| epoch 9 |  iter 1 / 351 | time 0[s] | loss 0.32\n",
      "| epoch 9 |  iter 21 / 351 | time 0[s] | loss 0.31\n",
      "| epoch 9 |  iter 41 / 351 | time 1[s] | loss 0.31\n",
      "| epoch 9 |  iter 61 / 351 | time 1[s] | loss 0.31\n",
      "| epoch 9 |  iter 81 / 351 | time 2[s] | loss 0.29\n",
      "| epoch 9 |  iter 101 / 351 | time 2[s] | loss 0.29\n",
      "| epoch 9 |  iter 121 / 351 | time 3[s] | loss 0.29\n",
      "| epoch 9 |  iter 141 / 351 | time 3[s] | loss 0.27\n",
      "| epoch 9 |  iter 161 / 351 | time 4[s] | loss 0.27\n",
      "| epoch 9 |  iter 181 / 351 | time 4[s] | loss 0.26\n",
      "| epoch 9 |  iter 201 / 351 | time 5[s] | loss 0.25\n",
      "| epoch 9 |  iter 221 / 351 | time 5[s] | loss 0.25\n",
      "| epoch 9 |  iter 241 / 351 | time 6[s] | loss 0.24\n",
      "| epoch 9 |  iter 261 / 351 | time 6[s] | loss 0.24\n",
      "| epoch 9 |  iter 281 / 351 | time 7[s] | loss 0.23\n",
      "| epoch 9 |  iter 301 / 351 | time 7[s] | loss 0.22\n",
      "| epoch 9 |  iter 321 / 351 | time 8[s] | loss 0.22\n",
      "| epoch 9 |  iter 341 / 351 | time 8[s] | loss 0.21\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "X 1140\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 657 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 83.280%\n",
      "| epoch 10 |  iter 1 / 351 | time 0[s] | loss 0.22\n",
      "| epoch 10 |  iter 21 / 351 | time 0[s] | loss 0.20\n",
      "| epoch 10 |  iter 41 / 351 | time 1[s] | loss 0.20\n",
      "| epoch 10 |  iter 61 / 351 | time 1[s] | loss 0.20\n",
      "| epoch 10 |  iter 81 / 351 | time 2[s] | loss 0.18\n",
      "| epoch 10 |  iter 101 / 351 | time 2[s] | loss 0.17\n",
      "| epoch 10 |  iter 121 / 351 | time 3[s] | loss 0.18\n",
      "| epoch 10 |  iter 141 / 351 | time 3[s] | loss 0.17\n",
      "| epoch 10 |  iter 161 / 351 | time 4[s] | loss 0.17\n",
      "| epoch 10 |  iter 181 / 351 | time 4[s] | loss 0.17\n",
      "| epoch 10 |  iter 201 / 351 | time 5[s] | loss 0.17\n",
      "| epoch 10 |  iter 221 / 351 | time 5[s] | loss 0.16\n",
      "| epoch 10 |  iter 241 / 351 | time 6[s] | loss 0.15\n",
      "| epoch 10 |  iter 261 / 351 | time 6[s] | loss 0.15\n",
      "| epoch 10 |  iter 281 / 351 | time 7[s] | loss 0.15\n",
      "| epoch 10 |  iter 301 / 351 | time 7[s] | loss 0.15\n",
      "| epoch 10 |  iter 321 / 351 | time 8[s] | loss 0.14\n",
      "| epoch 10 |  iter 341 / 351 | time 8[s] | loss 0.14\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "X 656 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 88.400%\n",
      "| epoch 11 |  iter 1 / 351 | time 0[s] | loss 0.13\n",
      "| epoch 11 |  iter 21 / 351 | time 0[s] | loss 0.13\n",
      "| epoch 11 |  iter 41 / 351 | time 1[s] | loss 0.13\n",
      "| epoch 11 |  iter 61 / 351 | time 1[s] | loss 0.12\n",
      "| epoch 11 |  iter 81 / 351 | time 2[s] | loss 0.12\n",
      "| epoch 11 |  iter 101 / 351 | time 2[s] | loss 0.12\n",
      "| epoch 11 |  iter 121 / 351 | time 3[s] | loss 0.11\n",
      "| epoch 11 |  iter 141 / 351 | time 3[s] | loss 0.12\n",
      "| epoch 11 |  iter 161 / 351 | time 4[s] | loss 0.11\n",
      "| epoch 11 |  iter 181 / 351 | time 4[s] | loss 0.11\n",
      "| epoch 11 |  iter 201 / 351 | time 5[s] | loss 0.12\n",
      "| epoch 11 |  iter 221 / 351 | time 5[s] | loss 0.11\n",
      "| epoch 11 |  iter 241 / 351 | time 6[s] | loss 0.11\n",
      "| epoch 11 |  iter 261 / 351 | time 6[s] | loss 0.10\n",
      "| epoch 11 |  iter 281 / 351 | time 7[s] | loss 0.10\n",
      "| epoch 11 |  iter 301 / 351 | time 7[s] | loss 0.10\n",
      "| epoch 11 |  iter 321 / 351 | time 8[s] | loss 0.09\n",
      "| epoch 11 |  iter 341 / 351 | time 8[s] | loss 0.09\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 90.940%\n",
      "| epoch 12 |  iter 1 / 351 | time 0[s] | loss 0.09\n",
      "| epoch 12 |  iter 21 / 351 | time 0[s] | loss 0.09\n",
      "| epoch 12 |  iter 41 / 351 | time 1[s] | loss 0.09\n",
      "| epoch 12 |  iter 61 / 351 | time 1[s] | loss 0.09\n",
      "| epoch 12 |  iter 81 / 351 | time 2[s] | loss 0.09\n",
      "| epoch 12 |  iter 101 / 351 | time 2[s] | loss 0.08\n",
      "| epoch 12 |  iter 121 / 351 | time 3[s] | loss 0.08\n",
      "| epoch 12 |  iter 141 / 351 | time 3[s] | loss 0.08\n",
      "| epoch 12 |  iter 161 / 351 | time 4[s] | loss 0.08\n",
      "| epoch 12 |  iter 181 / 351 | time 4[s] | loss 0.08\n",
      "| epoch 12 |  iter 201 / 351 | time 5[s] | loss 0.08\n",
      "| epoch 12 |  iter 221 / 351 | time 5[s] | loss 0.09\n",
      "| epoch 12 |  iter 241 / 351 | time 6[s] | loss 0.09\n",
      "| epoch 12 |  iter 261 / 351 | time 6[s] | loss 0.09\n",
      "| epoch 12 |  iter 281 / 351 | time 7[s] | loss 0.08\n",
      "| epoch 12 |  iter 301 / 351 | time 7[s] | loss 0.08\n",
      "| epoch 12 |  iter 321 / 351 | time 8[s] | loss 0.07\n",
      "| epoch 12 |  iter 341 / 351 | time 8[s] | loss 0.08\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 92.240%\n",
      "| epoch 13 |  iter 1 / 351 | time 0[s] | loss 0.07\n",
      "| epoch 13 |  iter 21 / 351 | time 0[s] | loss 0.07\n",
      "| epoch 13 |  iter 41 / 351 | time 1[s] | loss 0.07\n",
      "| epoch 13 |  iter 61 / 351 | time 1[s] | loss 0.07\n",
      "| epoch 13 |  iter 81 / 351 | time 2[s] | loss 0.06\n",
      "| epoch 13 |  iter 101 / 351 | time 2[s] | loss 0.06\n",
      "| epoch 13 |  iter 121 / 351 | time 3[s] | loss 0.07\n",
      "| epoch 13 |  iter 141 / 351 | time 3[s] | loss 0.06\n",
      "| epoch 13 |  iter 161 / 351 | time 4[s] | loss 0.06\n",
      "| epoch 13 |  iter 181 / 351 | time 4[s] | loss 0.06\n",
      "| epoch 13 |  iter 201 / 351 | time 5[s] | loss 0.06\n",
      "| epoch 13 |  iter 221 / 351 | time 5[s] | loss 0.06\n",
      "| epoch 13 |  iter 241 / 351 | time 6[s] | loss 0.06\n",
      "| epoch 13 |  iter 261 / 351 | time 6[s] | loss 0.06\n",
      "| epoch 13 |  iter 281 / 351 | time 7[s] | loss 0.06\n",
      "| epoch 13 |  iter 301 / 351 | time 7[s] | loss 0.05\n",
      "| epoch 13 |  iter 321 / 351 | time 8[s] | loss 0.05\n",
      "| epoch 13 |  iter 341 / 351 | time 8[s] | loss 0.06\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 94.380%\n",
      "| epoch 14 |  iter 1 / 351 | time 0[s] | loss 0.05\n",
      "| epoch 14 |  iter 21 / 351 | time 0[s] | loss 0.05\n",
      "| epoch 14 |  iter 41 / 351 | time 1[s] | loss 0.05\n",
      "| epoch 14 |  iter 61 / 351 | time 1[s] | loss 0.05\n",
      "| epoch 14 |  iter 81 / 351 | time 2[s] | loss 0.05\n",
      "| epoch 14 |  iter 101 / 351 | time 2[s] | loss 0.05\n",
      "| epoch 14 |  iter 121 / 351 | time 3[s] | loss 0.05\n",
      "| epoch 14 |  iter 141 / 351 | time 3[s] | loss 0.05\n",
      "| epoch 14 |  iter 161 / 351 | time 4[s] | loss 0.05\n",
      "| epoch 14 |  iter 181 / 351 | time 4[s] | loss 0.05\n",
      "| epoch 14 |  iter 201 / 351 | time 5[s] | loss 0.05\n",
      "| epoch 14 |  iter 221 / 351 | time 5[s] | loss 0.06\n",
      "| epoch 14 |  iter 241 / 351 | time 6[s] | loss 0.06\n",
      "| epoch 14 |  iter 261 / 351 | time 6[s] | loss 0.07\n",
      "| epoch 14 |  iter 281 / 351 | time 7[s] | loss 0.07\n",
      "| epoch 14 |  iter 301 / 351 | time 7[s] | loss 0.06\n",
      "| epoch 14 |  iter 321 / 351 | time 8[s] | loss 0.05\n",
      "| epoch 14 |  iter 341 / 351 | time 8[s] | loss 0.04\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 96.200%\n",
      "| epoch 15 |  iter 1 / 351 | time 0[s] | loss 0.03\n",
      "| epoch 15 |  iter 21 / 351 | time 0[s] | loss 0.04\n",
      "| epoch 15 |  iter 41 / 351 | time 1[s] | loss 0.04\n",
      "| epoch 15 |  iter 61 / 351 | time 1[s] | loss 0.05\n",
      "| epoch 15 |  iter 81 / 351 | time 2[s] | loss 0.04\n",
      "| epoch 15 |  iter 101 / 351 | time 2[s] | loss 0.04\n",
      "| epoch 15 |  iter 121 / 351 | time 3[s] | loss 0.04\n",
      "| epoch 15 |  iter 141 / 351 | time 3[s] | loss 0.04\n",
      "| epoch 15 |  iter 161 / 351 | time 4[s] | loss 0.04\n",
      "| epoch 15 |  iter 181 / 351 | time 4[s] | loss 0.04\n",
      "| epoch 15 |  iter 201 / 351 | time 5[s] | loss 0.04\n",
      "| epoch 15 |  iter 221 / 351 | time 5[s] | loss 0.04\n",
      "| epoch 15 |  iter 241 / 351 | time 6[s] | loss 0.03\n",
      "| epoch 15 |  iter 261 / 351 | time 6[s] | loss 0.04\n",
      "| epoch 15 |  iter 281 / 351 | time 7[s] | loss 0.05\n",
      "| epoch 15 |  iter 301 / 351 | time 7[s] | loss 0.05\n",
      "| epoch 15 |  iter 321 / 351 | time 8[s] | loss 0.04\n",
      "| epoch 15 |  iter 341 / 351 | time 8[s] | loss 0.04\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "X 1149\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 96.580%\n",
      "| epoch 16 |  iter 1 / 351 | time 0[s] | loss 0.03\n",
      "| epoch 16 |  iter 21 / 351 | time 0[s] | loss 0.04\n",
      "| epoch 16 |  iter 41 / 351 | time 1[s] | loss 0.04\n",
      "| epoch 16 |  iter 61 / 351 | time 1[s] | loss 0.05\n",
      "| epoch 16 |  iter 81 / 351 | time 2[s] | loss 0.05\n",
      "| epoch 16 |  iter 101 / 351 | time 2[s] | loss 0.05\n",
      "| epoch 16 |  iter 121 / 351 | time 3[s] | loss 0.05\n",
      "| epoch 16 |  iter 141 / 351 | time 3[s] | loss 0.04\n",
      "| epoch 16 |  iter 161 / 351 | time 4[s] | loss 0.04\n",
      "| epoch 16 |  iter 181 / 351 | time 4[s] | loss 0.05\n",
      "| epoch 16 |  iter 201 / 351 | time 5[s] | loss 0.05\n",
      "| epoch 16 |  iter 221 / 351 | time 5[s] | loss 0.07\n",
      "| epoch 16 |  iter 241 / 351 | time 6[s] | loss 0.06\n",
      "| epoch 16 |  iter 261 / 351 | time 6[s] | loss 0.06\n",
      "| epoch 16 |  iter 281 / 351 | time 7[s] | loss 0.05\n",
      "| epoch 16 |  iter 301 / 351 | time 7[s] | loss 0.04\n",
      "| epoch 16 |  iter 321 / 351 | time 8[s] | loss 0.03\n",
      "| epoch 16 |  iter 341 / 351 | time 8[s] | loss 0.03\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 96.280%\n",
      "| epoch 17 |  iter 1 / 351 | time 0[s] | loss 0.05\n",
      "| epoch 17 |  iter 21 / 351 | time 0[s] | loss 0.03\n",
      "| epoch 17 |  iter 41 / 351 | time 1[s] | loss 0.03\n",
      "| epoch 17 |  iter 61 / 351 | time 1[s] | loss 0.03\n",
      "| epoch 17 |  iter 81 / 351 | time 2[s] | loss 0.03\n",
      "| epoch 17 |  iter 101 / 351 | time 2[s] | loss 0.03\n",
      "| epoch 17 |  iter 121 / 351 | time 3[s] | loss 0.03\n",
      "| epoch 17 |  iter 141 / 351 | time 3[s] | loss 0.03\n",
      "| epoch 17 |  iter 161 / 351 | time 4[s] | loss 0.03\n",
      "| epoch 17 |  iter 181 / 351 | time 4[s] | loss 0.03\n",
      "| epoch 17 |  iter 201 / 351 | time 5[s] | loss 0.03\n",
      "| epoch 17 |  iter 221 / 351 | time 5[s] | loss 0.03\n",
      "| epoch 17 |  iter 241 / 351 | time 6[s] | loss 0.02\n",
      "| epoch 17 |  iter 261 / 351 | time 6[s] | loss 0.02\n",
      "| epoch 17 |  iter 281 / 351 | time 7[s] | loss 0.02\n",
      "| epoch 17 |  iter 301 / 351 | time 7[s] | loss 0.03\n",
      "| epoch 17 |  iter 321 / 351 | time 8[s] | loss 0.02\n",
      "| epoch 17 |  iter 341 / 351 | time 8[s] | loss 0.03\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 96.620%\n",
      "| epoch 18 |  iter 1 / 351 | time 0[s] | loss 0.03\n",
      "| epoch 18 |  iter 21 / 351 | time 0[s] | loss 0.03\n",
      "| epoch 18 |  iter 41 / 351 | time 1[s] | loss 0.02\n",
      "| epoch 18 |  iter 61 / 351 | time 1[s] | loss 0.02\n",
      "| epoch 18 |  iter 81 / 351 | time 2[s] | loss 0.02\n",
      "| epoch 18 |  iter 101 / 351 | time 2[s] | loss 0.02\n",
      "| epoch 18 |  iter 121 / 351 | time 3[s] | loss 0.02\n",
      "| epoch 18 |  iter 141 / 351 | time 3[s] | loss 0.02\n",
      "| epoch 18 |  iter 161 / 351 | time 4[s] | loss 0.02\n",
      "| epoch 18 |  iter 181 / 351 | time 4[s] | loss 0.03\n",
      "| epoch 18 |  iter 201 / 351 | time 5[s] | loss 0.05\n",
      "| epoch 18 |  iter 221 / 351 | time 5[s] | loss 0.04\n",
      "| epoch 18 |  iter 241 / 351 | time 6[s] | loss 0.04\n",
      "| epoch 18 |  iter 261 / 351 | time 6[s] | loss 0.04\n",
      "| epoch 18 |  iter 281 / 351 | time 7[s] | loss 0.05\n",
      "| epoch 18 |  iter 301 / 351 | time 7[s] | loss 0.06\n",
      "| epoch 18 |  iter 321 / 351 | time 8[s] | loss 0.05\n",
      "| epoch 18 |  iter 341 / 351 | time 8[s] | loss 0.04\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 95.660%\n",
      "| epoch 19 |  iter 1 / 351 | time 0[s] | loss 0.03\n",
      "| epoch 19 |  iter 21 / 351 | time 0[s] | loss 0.04\n",
      "| epoch 19 |  iter 41 / 351 | time 1[s] | loss 0.03\n",
      "| epoch 19 |  iter 61 / 351 | time 1[s] | loss 0.02\n",
      "| epoch 19 |  iter 81 / 351 | time 2[s] | loss 0.02\n",
      "| epoch 19 |  iter 101 / 351 | time 2[s] | loss 0.02\n",
      "| epoch 19 |  iter 121 / 351 | time 3[s] | loss 0.02\n",
      "| epoch 19 |  iter 141 / 351 | time 3[s] | loss 0.02\n",
      "| epoch 19 |  iter 161 / 351 | time 4[s] | loss 0.02\n",
      "| epoch 19 |  iter 181 / 351 | time 4[s] | loss 0.02\n",
      "| epoch 19 |  iter 201 / 351 | time 5[s] | loss 0.02\n",
      "| epoch 19 |  iter 221 / 351 | time 5[s] | loss 0.02\n",
      "| epoch 19 |  iter 241 / 351 | time 6[s] | loss 0.02\n",
      "| epoch 19 |  iter 261 / 351 | time 6[s] | loss 0.02\n",
      "| epoch 19 |  iter 281 / 351 | time 7[s] | loss 0.03\n",
      "| epoch 19 |  iter 301 / 351 | time 7[s] | loss 0.03\n",
      "| epoch 19 |  iter 321 / 351 | time 8[s] | loss 0.03\n",
      "| epoch 19 |  iter 341 / 351 | time 8[s] | loss 0.02\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 97.940%\n",
      "| epoch 20 |  iter 1 / 351 | time 0[s] | loss 0.02\n",
      "| epoch 20 |  iter 21 / 351 | time 0[s] | loss 0.02\n",
      "| epoch 20 |  iter 41 / 351 | time 1[s] | loss 0.02\n",
      "| epoch 20 |  iter 61 / 351 | time 1[s] | loss 0.03\n",
      "| epoch 20 |  iter 81 / 351 | time 2[s] | loss 0.02\n",
      "| epoch 20 |  iter 101 / 351 | time 2[s] | loss 0.02\n",
      "| epoch 20 |  iter 121 / 351 | time 3[s] | loss 0.02\n",
      "| epoch 20 |  iter 141 / 351 | time 3[s] | loss 0.02\n",
      "| epoch 20 |  iter 161 / 351 | time 4[s] | loss 0.02\n",
      "| epoch 20 |  iter 181 / 351 | time 4[s] | loss 0.02\n",
      "| epoch 20 |  iter 201 / 351 | time 5[s] | loss 0.02\n",
      "| epoch 20 |  iter 221 / 351 | time 5[s] | loss 0.02\n",
      "| epoch 20 |  iter 241 / 351 | time 6[s] | loss 0.02\n",
      "| epoch 20 |  iter 261 / 351 | time 6[s] | loss 0.02\n",
      "| epoch 20 |  iter 281 / 351 | time 7[s] | loss 0.02\n",
      "| epoch 20 |  iter 301 / 351 | time 7[s] | loss 0.02\n",
      "| epoch 20 |  iter 321 / 351 | time 8[s] | loss 0.02\n",
      "| epoch 20 |  iter 341 / 351 | time 8[s] | loss 0.03\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 90.340%\n",
      "| epoch 21 |  iter 1 / 351 | time 0[s] | loss 0.08\n",
      "| epoch 21 |  iter 21 / 351 | time 0[s] | loss 0.07\n",
      "| epoch 21 |  iter 41 / 351 | time 1[s] | loss 0.05\n",
      "| epoch 21 |  iter 61 / 351 | time 1[s] | loss 0.04\n",
      "| epoch 21 |  iter 81 / 351 | time 2[s] | loss 0.03\n",
      "| epoch 21 |  iter 101 / 351 | time 2[s] | loss 0.02\n",
      "| epoch 21 |  iter 121 / 351 | time 3[s] | loss 0.03\n",
      "| epoch 21 |  iter 141 / 351 | time 3[s] | loss 0.03\n",
      "| epoch 21 |  iter 161 / 351 | time 4[s] | loss 0.02\n",
      "| epoch 21 |  iter 181 / 351 | time 4[s] | loss 0.02\n",
      "| epoch 21 |  iter 201 / 351 | time 5[s] | loss 0.02\n",
      "| epoch 21 |  iter 221 / 351 | time 5[s] | loss 0.02\n",
      "| epoch 21 |  iter 241 / 351 | time 6[s] | loss 0.03\n",
      "| epoch 21 |  iter 261 / 351 | time 6[s] | loss 0.03\n",
      "| epoch 21 |  iter 281 / 351 | time 7[s] | loss 0.02\n",
      "| epoch 21 |  iter 301 / 351 | time 7[s] | loss 0.02\n",
      "| epoch 21 |  iter 321 / 351 | time 8[s] | loss 0.03\n",
      "| epoch 21 |  iter 341 / 351 | time 8[s] | loss 0.02\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 98.200%\n",
      "| epoch 22 |  iter 1 / 351 | time 0[s] | loss 0.01\n",
      "| epoch 22 |  iter 21 / 351 | time 0[s] | loss 0.02\n",
      "| epoch 22 |  iter 41 / 351 | time 1[s] | loss 0.02\n",
      "| epoch 22 |  iter 61 / 351 | time 1[s] | loss 0.01\n",
      "| epoch 22 |  iter 81 / 351 | time 2[s] | loss 0.01\n",
      "| epoch 22 |  iter 101 / 351 | time 2[s] | loss 0.01\n",
      "| epoch 22 |  iter 121 / 351 | time 3[s] | loss 0.01\n",
      "| epoch 22 |  iter 141 / 351 | time 3[s] | loss 0.01\n",
      "| epoch 22 |  iter 161 / 351 | time 4[s] | loss 0.01\n",
      "| epoch 22 |  iter 181 / 351 | time 4[s] | loss 0.01\n",
      "| epoch 22 |  iter 201 / 351 | time 5[s] | loss 0.01\n",
      "| epoch 22 |  iter 221 / 351 | time 5[s] | loss 0.01\n",
      "| epoch 22 |  iter 241 / 351 | time 6[s] | loss 0.01\n",
      "| epoch 22 |  iter 261 / 351 | time 6[s] | loss 0.01\n",
      "| epoch 22 |  iter 281 / 351 | time 7[s] | loss 0.01\n",
      "| epoch 22 |  iter 301 / 351 | time 7[s] | loss 0.01\n",
      "| epoch 22 |  iter 321 / 351 | time 8[s] | loss 0.01\n",
      "| epoch 22 |  iter 341 / 351 | time 8[s] | loss 0.01\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 97.720%\n",
      "| epoch 23 |  iter 1 / 351 | time 0[s] | loss 0.01\n",
      "| epoch 23 |  iter 21 / 351 | time 0[s] | loss 0.02\n",
      "| epoch 23 |  iter 41 / 351 | time 1[s] | loss 0.01\n",
      "| epoch 23 |  iter 61 / 351 | time 1[s] | loss 0.02\n",
      "| epoch 23 |  iter 81 / 351 | time 2[s] | loss 0.02\n",
      "| epoch 23 |  iter 101 / 351 | time 2[s] | loss 0.03\n",
      "| epoch 23 |  iter 121 / 351 | time 3[s] | loss 0.04\n",
      "| epoch 23 |  iter 141 / 351 | time 3[s] | loss 0.04\n",
      "| epoch 23 |  iter 161 / 351 | time 4[s] | loss 0.04\n",
      "| epoch 23 |  iter 181 / 351 | time 4[s] | loss 0.04\n",
      "| epoch 23 |  iter 201 / 351 | time 5[s] | loss 0.03\n",
      "| epoch 23 |  iter 221 / 351 | time 5[s] | loss 0.04\n",
      "| epoch 23 |  iter 241 / 351 | time 6[s] | loss 0.04\n",
      "| epoch 23 |  iter 261 / 351 | time 6[s] | loss 0.05\n",
      "| epoch 23 |  iter 281 / 351 | time 7[s] | loss 0.04\n",
      "| epoch 23 |  iter 301 / 351 | time 7[s] | loss 0.03\n",
      "| epoch 23 |  iter 321 / 351 | time 8[s] | loss 0.03\n",
      "| epoch 23 |  iter 341 / 351 | time 8[s] | loss 0.02\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 97.020%\n",
      "| epoch 24 |  iter 1 / 351 | time 0[s] | loss 0.01\n",
      "| epoch 24 |  iter 21 / 351 | time 0[s] | loss 0.02\n",
      "| epoch 24 |  iter 41 / 351 | time 1[s] | loss 0.02\n",
      "| epoch 24 |  iter 61 / 351 | time 1[s] | loss 0.02\n",
      "| epoch 24 |  iter 81 / 351 | time 2[s] | loss 0.02\n",
      "| epoch 24 |  iter 101 / 351 | time 2[s] | loss 0.01\n",
      "| epoch 24 |  iter 121 / 351 | time 3[s] | loss 0.01\n",
      "| epoch 24 |  iter 141 / 351 | time 3[s] | loss 0.01\n",
      "| epoch 24 |  iter 161 / 351 | time 4[s] | loss 0.01\n",
      "| epoch 24 |  iter 181 / 351 | time 4[s] | loss 0.01\n",
      "| epoch 24 |  iter 201 / 351 | time 5[s] | loss 0.01\n",
      "| epoch 24 |  iter 221 / 351 | time 5[s] | loss 0.01\n",
      "| epoch 24 |  iter 241 / 351 | time 6[s] | loss 0.01\n",
      "| epoch 24 |  iter 261 / 351 | time 6[s] | loss 0.01\n",
      "| epoch 24 |  iter 281 / 351 | time 7[s] | loss 0.01\n",
      "| epoch 24 |  iter 301 / 351 | time 7[s] | loss 0.01\n",
      "| epoch 24 |  iter 321 / 351 | time 8[s] | loss 0.01\n",
      "| epoch 24 |  iter 341 / 351 | time 8[s] | loss 0.01\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 99.140%\n",
      "| epoch 25 |  iter 1 / 351 | time 0[s] | loss 0.01\n",
      "| epoch 25 |  iter 21 / 351 | time 0[s] | loss 0.01\n",
      "| epoch 25 |  iter 41 / 351 | time 1[s] | loss 0.01\n",
      "| epoch 25 |  iter 61 / 351 | time 1[s] | loss 0.01\n",
      "| epoch 25 |  iter 81 / 351 | time 2[s] | loss 0.01\n",
      "| epoch 25 |  iter 101 / 351 | time 2[s] | loss 0.01\n",
      "| epoch 25 |  iter 121 / 351 | time 3[s] | loss 0.01\n",
      "| epoch 25 |  iter 141 / 351 | time 3[s] | loss 0.01\n",
      "| epoch 25 |  iter 161 / 351 | time 4[s] | loss 0.01\n",
      "| epoch 25 |  iter 181 / 351 | time 4[s] | loss 0.01\n",
      "| epoch 25 |  iter 201 / 351 | time 5[s] | loss 0.01\n",
      "| epoch 25 |  iter 221 / 351 | time 5[s] | loss 0.01\n",
      "| epoch 25 |  iter 241 / 351 | time 6[s] | loss 0.02\n",
      "| epoch 25 |  iter 261 / 351 | time 6[s] | loss 0.03\n",
      "| epoch 25 |  iter 281 / 351 | time 7[s] | loss 0.06\n",
      "| epoch 25 |  iter 301 / 351 | time 7[s] | loss 0.08\n",
      "| epoch 25 |  iter 321 / 351 | time 8[s] | loss 0.07\n",
      "| epoch 25 |  iter 341 / 351 | time 8[s] | loss 0.04\n",
      "Q   58+77\n",
      "T 162 \n",
      "O 162 \n",
      "---\n",
      "Q 461+579\n",
      "T 1139\n",
      "O 1139\n",
      "---\n",
      "Q  48+285\n",
      "T 666 \n",
      "O 666 \n",
      "---\n",
      "Q   551+8\n",
      "T 163 \n",
      "O 163 \n",
      "---\n",
      "Q  55+763\n",
      "T 422 \n",
      "O 422 \n",
      "---\n",
      "Q 752+006\n",
      "T 857 \n",
      "O 857 \n",
      "---\n",
      "Q 292+167\n",
      "T 1053\n",
      "O 1053\n",
      "---\n",
      "Q 795+038\n",
      "T 1427\n",
      "O 1427\n",
      "---\n",
      "Q  838+62\n",
      "T 864 \n",
      "O 864 \n",
      "---\n",
      "Q  39+341\n",
      "T 236 \n",
      "O 236 \n",
      "---\n",
      "val acc 94.540%\n"
     ]
    }
   ],
   "source": [
    "acc_list = []\n",
    "for epoch in range(max_epoch):\n",
    "    trainer.fit(x_train, t_train, max_epoch=1,\n",
    "                batch_size=batch_size, max_grad=max_grad)\n",
    "\n",
    "    correct_num = 0\n",
    "    for i in range(len(x_test)):\n",
    "        question, correct = x_test[[i]], t_test[[i]]\n",
    "        verbose = i < 10\n",
    "        correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose)\n",
    "    acc = float(correct_num) / len(x_test)\n",
    "    acc_list.append(acc)\n",
    "    print('val acc %.3f%%' % (acc * 100))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-05-09T17:31:49.373844600Z",
     "start_time": "2023-05-09T17:24:29.826992700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
